###### Prediction after Regression ######
### Author: Nils-Christian Bormann
### This version: 11 Mar 2012
### Task: Program a function that predicts values after MLE


prediction <- function(coef = NA, vcv = NA, data = NA, model = NA, setx = NULL, setx1=NULL, position = NA, position1=NULL, firstd = FALSE, setcontrol = NA, sim = NA, ci = NA){
	
	## Check and prepare input
	# load libraries
	require(mgcv)
	require(MASS)
	require(mvtnorm)
		
	# check input and possibly produce errors
	if(length(coef) != dim(vcv)[1]) stop("Error: Non-matching number of coefficients and standard errors.")
	if(!is.null(dim(data))){if(length(coef) != dim(data)[2]){stop("Error: Non-matching number of coefficients and variables.")}}
	if(!is.null(dim(data))){if(dim(vcv)[2] != dim(data)[2]){stop("Error: Non-matching number of standard errors and variables.")}}
	if(!is.null(setcontrol)){if(all(is.na(setcontrol))){stop("Error: Vector of control variables must not include misisng values.")}}
	if(all(!is.na(setcontrol))){if(length(coef) != (length(setcontrol))){stop("Error: Non-matching number of coefficients and variable values.")}} # add 1 due to missing constant from data and/or set control
	if(all(!is.na(setcontrol))){if(dim(vcv)[2] != (length(setcontrol))){stop("Error: Non-matching number of standard errors and variable values.")}} # add 1 due to missing constant from data and/or set control
	if(!is.character(model)) stop("Error: Model needs to be of type 'character'.")
	if(is.null(dim(data)) & is.na(setcontrol)[1]) stop("Error: Either a data frame or the setcontrol vector must be provided.")
	if(!is.na(ci)[1]){ if(!is.element(ci, c(90,95))) stop("Error: CI must be one of 90 or 95.")}
	if(!is.na(setcontrol)[1]){ if(is.element(NA, setcontrol)) stop("Error: Setcontrol vector cannot include missing values.")}	
	if(!is.logical(firstd)) stop("Error: firstd must be one of TRUE, FALSE, T, F.")
	
	# delete missing
	if(length(unlist(data[is.na(data)])) > 1) data <- na.omit(data)
	
	# Set standard values
	if(is.na(position)[1]) position <- 1
	if(is.na(sim)) sim <- 2000
	if(is.na(ci)) ci <- 95
	
	# Set data types
	coef <- coef
	vcv <- as.matrix(vcv)
	
	# Available link functions
	logit <- function(x) exp(x)/(1+exp(x))
	probit <- function(x) pnorm(x)
	
	link <- c(logit, probit)
	names(link) <- c("logit", "probit")
	if(!is.element(as.character(model) ,names(link))) {
		stop("Error: Model link not available.")
	}


	## Set control variables for prediction
	if(any(is.na(setcontrol))){
		# obtain variable types (1 = continuous, 0 = nominal)
		ctype <- apply(as.matrix(data), 2, function(x){
			if(length(table(x)) == 2){ctype <- 0}
				else{ctype <- 1}
			return(ctype)
		})
		
		# set variable values (either mean or modal value) 
		xval <- rep(0, dim(data)[2])
		for(i in 1:dim(data)[2]){
			xval[i] <- if(ctype[i]==1){
						mean(data[,i])
						} else{
							ifelse(mean(data[,i]) > .5, 1, 0)
						}
		}
	} else{xval <- setcontrol}
	
	
	## Prepare matrix for predictions
	if(is.null(dim(setx))){
		X <- matrix(xval, nrow=length(setx), ncol=length(xval), byrow=T)
	} else{
		X <- matrix(xval, nrow=dim(setx)[1], ncol=length(xval), byrow=T)
	}
	
	## Set values for variable of interest
	X[,position] <- setx
	
	## Prepare alternative matrix for predictions
	if(firstd==T){
		if(is.null(dim(setx1))){
				X1 <- matrix(xval, nrow=1, ncol=length(xval), byrow=T) ## this is problematic - I don't distinguish between changing multiple values on more than one variable and multiple values on one variable 
			} else{
				X1 <- matrix(xval, nrow=dim(setx1)[1], ncol=length(xval), byrow=T) ## this is problematic
			}
		} else{
		if(!is.null(dim(setx1))){ # probably to change multiple values at same time w/o first differences
			X1 <- matrix(xval, nrow=dim(setx1)[1], ncol=length(xval), byrow=T)
		}	
	}
	
	
	## Set values for variable of interest for first differences 
	if(!is.null(position1)){
		X1[,position1] <- setx1
	}

	## Prepare output matrix
	yhat <- matrix(NA,nrow=dim(X)[1], ncol=sim) # matrix with length of values of interest and width of draws from multivariate normal distribution
	
	## Simulate
	if(firstd==F){
		for(i in 1:sim){
			# draw simulated coefficients and multiply with value constellations
			xbeta <- X %*% t(rmvnorm(1, coef, vcv, method="eigen"))
			# functional form transformation
			yhat[,i] <- logit(xbeta) 
		} # end simulation for predicted probabilities
	} else{ 
		for(i in 1:sim){
			# draw simulated coefficients and multiply with value constellations
			yhat[,i] <- logit(X1 %*% t(rmvnorm(1, coef, vcv, method="eigen"))) -
						logit(X %*% t(rmvnorm(1, coef, vcv, method="eigen")))
					 
		} # end simulation for first differences
	} # end simulations
	
	## Prepare output (confidence intervals)
	if(ci==95){bounds <- c(.025, 0.975)} else{bounds <- c(.05, .95)} 
	
	## Output 
	# collapse yhat along simulations for mean, lower and upper bound
	yhat.qi <- apply(yhat, 1, quantile, probs = c(bounds[1], 0.5, bounds[2]))
	pp <- yhat.qi
	
	print(pp)
}
